{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "toc": true
   },
   "source": [
    "<h1>Table of Contents<span class=\"tocSkip\"></span></h1>\n",
    "<div class=\"toc\"><ul class=\"toc-item\"></ul></div>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'inited_user=c92456b42af0ae27c9c78686d72a522e; __uuid=1681903455816.28; __gc_id=ef3eaea1f0d64db69c0eda34b31b65d2; _ga=GA1.1.2032041130.1681903463; need_bind_tel=false; access_system=C; user_roles=0; XSRF-TOKEN=yla4eXsCRdmCywqgAYcp_g; Hm_lvt_a2647413544f5a04f00da7eee0d5e200=1701256315,1703161365,1703176967; __tlog=1703176967458.93%7C00000000%7C00000000%7C00000000%7C00000000; imApp_0=1; hpo_role-sec_project=sec_project_liepin; hpo_sec_tenant=0; acw_tc=276077ca17031789904274379e1077078bdc0080e0ead6b4e8aa9b225270c6; UniqueKey=a6c3244c765a4bdf7ecd4408bb1e9b04; liepin_login_valid=0; lt_auth=7eZZbyEHmliv4XPagGNW4Kge29OvVz%2FJ8HpchE0Ih9G6UvCx4P%2FmQAOHqrQO%2FCoIqxpwcfwzMLf%2BNun2yXBK7EEU%2BFGnlZ6utf6k1X4eTuRnHuyflMXuqsjQQ5wtrXo6ykpgn2si0HU%3D; user_photo=5f8fa3a9dfb13a7dee343d4808u.png; user_name=%E5%A8%84%E7%81%BF; new_user=false; c_flag=3a25189da43014c45faf359db08d89e6; inited_user=c92456b42af0ae27c9c78686d72a522e; imId=b5e1273d5cb093f4200539188559567b; imId_0=b5e1273d5cb093f4200539188559567b; imClientId=b5e1273d5cb093f45964a9ee4b43b7b5; imClientId_0=b5e1273d5cb093f45964a9ee4b43b7b5; __session_seq=26; __uv_seq=31; Hm_lpvt_a2647413544f5a04f00da7eee0d5e200=1703180315; __tlg_event_seq=372; fe_im_socketSequence_new_0=14_12_14; fe_im_connectJson_0=%7B%220_a6c3244c765a4bdf7ecd4408bb1e9b04%22%3A%7B%22socketConnect%22%3A%223%22%2C%22connectDomain%22%3A%22liepin.com%22%7D%7D; fe_im_opened_pages=; _ga_54YTJKWN86=GS1.1.1703178289.21.1.1703180318.0.0.0'"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 查询cookie是否可以用，建立登录cookie\n",
    "cookie=\"inited_user=c92456b42af0ae27c9c78686d72a522e; __uuid=1681903455816.28; __gc_id=ef3eaea1f0d64db69c0eda34b31b65d2; _ga=GA1.1.2032041130.1681903463; need_bind_tel=false; access_system=C; user_roles=0; XSRF-TOKEN=yla4eXsCRdmCywqgAYcp_g; Hm_lvt_a2647413544f5a04f00da7eee0d5e200=1701256315,1703161365,1703176967; __tlog=1703176967458.93%7C00000000%7C00000000%7C00000000%7C00000000; imApp_0=1; hpo_role-sec_project=sec_project_liepin; hpo_sec_tenant=0; acw_tc=276077ca17031789904274379e1077078bdc0080e0ead6b4e8aa9b225270c6; UniqueKey=a6c3244c765a4bdf7ecd4408bb1e9b04; liepin_login_valid=0; lt_auth=7eZZbyEHmliv4XPagGNW4Kge29OvVz%2FJ8HpchE0Ih9G6UvCx4P%2FmQAOHqrQO%2FCoIqxpwcfwzMLf%2BNun2yXBK7EEU%2BFGnlZ6utf6k1X4eTuRnHuyflMXuqsjQQ5wtrXo6ykpgn2si0HU%3D; user_photo=5f8fa3a9dfb13a7dee343d4808u.png; user_name=%E5%A8%84%E7%81%BF; new_user=false; c_flag=3a25189da43014c45faf359db08d89e6; inited_user=c92456b42af0ae27c9c78686d72a522e; imId=b5e1273d5cb093f4200539188559567b; imId_0=b5e1273d5cb093f4200539188559567b; imClientId=b5e1273d5cb093f45964a9ee4b43b7b5; imClientId_0=b5e1273d5cb093f45964a9ee4b43b7b5; __session_seq=26; __uv_seq=31; Hm_lpvt_a2647413544f5a04f00da7eee0d5e200=1703180315; __tlg_event_seq=372; fe_im_socketSequence_new_0=14_12_14; fe_im_connectJson_0=%7B%220_a6c3244c765a4bdf7ecd4408bb1e9b04%22%3A%7B%22socketConnect%22%3A%223%22%2C%22connectDomain%22%3A%22liepin.com%22%7D%7D; fe_im_opened_pages=; _ga_54YTJKWN86=GS1.1.1703178289.21.1.1703180318.0.0.0\"\n",
    "cookie"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "登录cookie inited_user=c92456b42af0ae27c9c78686d72a522e; __uuid=1681903455816.28; __gc_id=ef3eaea1f0d64db69c0eda34b31b65d2; _ga=GA1.1.2032041130.1681903463; need_bind_tel=false; access_system=C; user_roles=0; XSRF-TOKEN=yla4eXsCRdmCywqgAYcp_g; Hm_lvt_a2647413544f5a04f00da7eee0d5e200=1701256315,1703161365,1703176967; __tlog=1703176967458.93%7C00000000%7C00000000%7C00000000%7C00000000; imApp_0=1; hpo_role-sec_project=sec_project_liepin; hpo_sec_tenant=0; acw_tc=276077ca17031789904274379e1077078bdc0080e0ead6b4e8aa9b225270c6; UniqueKey=a6c3244c765a4bdf7ecd4408bb1e9b04; liepin_login_valid=0; lt_auth=7eZZbyEHmliv4XPagGNW4Kge29OvVz%2FJ8HpchE0Ih9G6UvCx4P%2FmQAOHqrQO%2FCoIqxpwcfwzMLf%2BNun2yXBK7EEU%2BFGnlZ6utf6k1X4eTuRnHuyflMXuqsjQQ5wtrXo6ykpgn2si0HU%3D; user_photo=5f8fa3a9dfb13a7dee343d4808u.png; user_name=%E5%A8%84%E7%81%BF; new_user=false; c_flag=3a25189da43014c45faf359db08d89e6; inited_user=c92456b42af0ae27c9c78686d72a522e; imId=b5e1273d5cb093f4200539188559567b; imId_0=b5e1273d5cb093f4200539188559567b; imClientId=b5e1273d5cb093f45964a9ee4b43b7b5; imClientId_0=b5e1273d5cb093f45964a9ee4b43b7b5; __session_seq=26; __uv_seq=31; Hm_lpvt_a2647413544f5a04f00da7eee0d5e200=1703180315; __tlg_event_seq=372; fe_im_socketSequence_new_0=14_12_14; fe_im_connectJson_0=%7B%220_a6c3244c765a4bdf7ecd4408bb1e9b04%22%3A%7B%22socketConnect%22%3A%223%22%2C%22connectDomain%22%3A%22liepin.com%22%7D%7D; fe_im_opened_pages=; _ga_54YTJKWN86=GS1.1.1703178289.21.1.1703180318.0.0.0\n",
      "这是第1页，接下来将先等待3秒...然后以继续抓取\n",
      "这是第2页，接下来将先等待4秒...然后以继续抓取\n",
      "这是第3页，接下来将先等待7秒...然后以继续抓取\n",
      "这是第4页，接下来将先等待5秒...然后以继续抓取\n",
      "这是第5页，接下来将先等待9秒...然后以继续抓取\n",
      "这是第6页，接下来将先等待9秒...然后以继续抓取\n",
      "这是第7页，接下来将先等待4秒...然后以继续抓取\n",
      "这是第8页，接下来将先等待5秒...然后以继续抓取\n"
     ]
    }
   ],
   "source": [
    "# 爬取数据（输入地区、职位、学历）\n",
    "import crawl_liepin\n",
    "广告策划_北京_result = crawl_liepin.crawl(城市=\"北京\",关键词=\"广告策划\",学历='本科',登录cookie=cookie)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>job.labels</th>\n",
       "      <th>job.title</th>\n",
       "      <th>job.salary</th>\n",
       "      <th>job.dq</th>\n",
       "      <th>job.topJob</th>\n",
       "      <th>job.requireWorkYears</th>\n",
       "      <th>job.requireEduLevel</th>\n",
       "      <th>comp.compStage</th>\n",
       "      <th>comp.compName</th>\n",
       "      <th>comp.compIndustry</th>\n",
       "      <th>comp.compScale</th>\n",
       "      <th>comp.link</th>\n",
       "      <th>job.link</th>\n",
       "      <th>recruiter.recruiterName</th>\n",
       "      <th>recruiter.recruiterTitle</th>\n",
       "      <th>job.refreshTime</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>[3个月, 本科]</td>\n",
       "      <td>广告策划实习生</td>\n",
       "      <td>100-120元/天</td>\n",
       "      <td>北京-朝阳区</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>万达传媒</td>\n",
       "      <td>广播/影视/录音</td>\n",
       "      <td>100-499人</td>\n",
       "      <td>https://www.liepin.com/company/1727866/</td>\n",
       "      <td>https://www.liepin.com/lptjob/60824047</td>\n",
       "      <td>李女士</td>\n",
       "      <td>高级人力资源经理</td>\n",
       "      <td>20231011143844</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>[6个月, 提供转正, 本科, 导演助理, 编导助理, 实习生, 纪录片实习]</td>\n",
       "      <td>编导助理</td>\n",
       "      <td>50-100元/天</td>\n",
       "      <td>北京-海淀区</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>五洲畅想国际传媒（北京）有限公司</td>\n",
       "      <td>广播/影视/录音</td>\n",
       "      <td>50-99人</td>\n",
       "      <td>https://www.liepin.com/company/555423/</td>\n",
       "      <td>https://www.liepin.com/lptjob/52032685</td>\n",
       "      <td>樊女士</td>\n",
       "      <td>人事经理</td>\n",
       "      <td>20220819225845</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>[企业品牌, 品牌营销, 自媒体, 公众号]</td>\n",
       "      <td>品牌宣传</td>\n",
       "      <td>8-15k</td>\n",
       "      <td>北京-东城区</td>\n",
       "      <td>False</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>本科</td>\n",
       "      <td>融资未公开</td>\n",
       "      <td>诺文科风机(北京)有限公司</td>\n",
       "      <td>人工智能</td>\n",
       "      <td>50-99人</td>\n",
       "      <td>https://www.liepin.com/company/8127145/</td>\n",
       "      <td>https://www.liepin.com/job/1961355953.shtml</td>\n",
       "      <td>屈女士</td>\n",
       "      <td></td>\n",
       "      <td>20231218112705</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>[]</td>\n",
       "      <td>影视宣传经理</td>\n",
       "      <td>10-18k·15薪</td>\n",
       "      <td>北京-西城区</td>\n",
       "      <td>False</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>本科</td>\n",
       "      <td>NaN</td>\n",
       "      <td>欧桦文化</td>\n",
       "      <td>在线社交/媒体</td>\n",
       "      <td>1-49人</td>\n",
       "      <td>https://www.liepin.com/company/12441743/</td>\n",
       "      <td>https://www.liepin.com/job/1957676991.shtml</td>\n",
       "      <td>陈女士</td>\n",
       "      <td>人事专员</td>\n",
       "      <td>20230417183923</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>[]</td>\n",
       "      <td>影视宣发经理</td>\n",
       "      <td>9-13k·15薪</td>\n",
       "      <td>北京-西城区</td>\n",
       "      <td>False</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>统招本科</td>\n",
       "      <td>NaN</td>\n",
       "      <td>欧桦文化</td>\n",
       "      <td>在线社交/媒体</td>\n",
       "      <td>1-49人</td>\n",
       "      <td>https://www.liepin.com/company/12441743/</td>\n",
       "      <td>https://www.liepin.com/job/1957946027.shtml</td>\n",
       "      <td>陈女士</td>\n",
       "      <td>人事专员</td>\n",
       "      <td>20230419161003</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>[广告策划, 市场策划, 市场推广, 市场营销, 广告媒介]</td>\n",
       "      <td>市场主管</td>\n",
       "      <td>7-10k·15薪</td>\n",
       "      <td>北京-海淀区</td>\n",
       "      <td>False</td>\n",
       "      <td>3-5年</td>\n",
       "      <td>统招本科</td>\n",
       "      <td>NaN</td>\n",
       "      <td>五棵松万达广场</td>\n",
       "      <td>其他商务服务业</td>\n",
       "      <td>100-499人</td>\n",
       "      <td>https://www.liepin.com/company/13393903/</td>\n",
       "      <td>https://www.liepin.com/job/1963714801.shtml</td>\n",
       "      <td>宋先生</td>\n",
       "      <td>常务副总经理</td>\n",
       "      <td>20231219165035</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>[海报设计, 创意设计, 物料设计]</td>\n",
       "      <td>平面设计</td>\n",
       "      <td>7-10k</td>\n",
       "      <td>北京-朝阳区</td>\n",
       "      <td>False</td>\n",
       "      <td>3-5年</td>\n",
       "      <td>本科</td>\n",
       "      <td>NaN</td>\n",
       "      <td>福建保罗绿色投资集团</td>\n",
       "      <td>环保</td>\n",
       "      <td>500-999人</td>\n",
       "      <td>https://www.liepin.com/company/13743107/</td>\n",
       "      <td>https://www.liepin.com/job/1963368375.shtml</td>\n",
       "      <td>丁女士</td>\n",
       "      <td>人资</td>\n",
       "      <td>20231204143544</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>[营销策划, 用户运营]</td>\n",
       "      <td>整合营销专家</td>\n",
       "      <td>25-45k·14薪</td>\n",
       "      <td>北京-大兴区</td>\n",
       "      <td>False</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>本科</td>\n",
       "      <td>已上市</td>\n",
       "      <td>京东商城</td>\n",
       "      <td>互联网</td>\n",
       "      <td>10000人以上</td>\n",
       "      <td>https://www.liepin.com/company/9747025/</td>\n",
       "      <td>https://www.liepin.com/job/1940418567.shtml</td>\n",
       "      <td>张女士</td>\n",
       "      <td>猎头顾问</td>\n",
       "      <td>20211123114823</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>[品牌文案]</td>\n",
       "      <td>文案</td>\n",
       "      <td>10-15k</td>\n",
       "      <td>北京-大兴区</td>\n",
       "      <td>False</td>\n",
       "      <td>1-3年</td>\n",
       "      <td>本科</td>\n",
       "      <td>融资未公开</td>\n",
       "      <td>董到家食品科技(北京)有限公司</td>\n",
       "      <td>互联网</td>\n",
       "      <td>50-99人</td>\n",
       "      <td>https://www.liepin.com/company/9564541/</td>\n",
       "      <td>https://www.liepin.com/job/1931768031.shtml</td>\n",
       "      <td>王女士</td>\n",
       "      <td>招聘</td>\n",
       "      <td>20200916161408</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>[市场营销, 广告策划, 广告媒介]</td>\n",
       "      <td>市场助理</td>\n",
       "      <td>6-10k</td>\n",
       "      <td>北京-朝阳区</td>\n",
       "      <td>False</td>\n",
       "      <td>1-3年</td>\n",
       "      <td>本科</td>\n",
       "      <td>NaN</td>\n",
       "      <td>北京华大九天科技股份有限公司</td>\n",
       "      <td>电子/半导体/集成电路</td>\n",
       "      <td>100-499人</td>\n",
       "      <td>https://www.liepin.com/company/699075/</td>\n",
       "      <td>https://www.liepin.com/job/1924069079.shtml</td>\n",
       "      <td>喻女士</td>\n",
       "      <td>部门经理</td>\n",
       "      <td>20220314154119</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>290 rows × 16 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                 job.labels job.title  job.salary  job.dq  \\\n",
       "0                                 [3个月, 本科]   广告策划实习生  100-120元/天  北京-朝阳区   \n",
       "1   [6个月, 提供转正, 本科, 导演助理, 编导助理, 实习生, 纪录片实习]      编导助理   50-100元/天  北京-海淀区   \n",
       "2                    [企业品牌, 品牌营销, 自媒体, 公众号]      品牌宣传       8-15k  北京-东城区   \n",
       "3                                        []    影视宣传经理  10-18k·15薪  北京-西城区   \n",
       "4                                        []    影视宣发经理   9-13k·15薪  北京-西城区   \n",
       "..                                      ...       ...         ...     ...   \n",
       "5            [广告策划, 市场策划, 市场推广, 市场营销, 广告媒介]      市场主管   7-10k·15薪  北京-海淀区   \n",
       "6                        [海报设计, 创意设计, 物料设计]      平面设计       7-10k  北京-朝阳区   \n",
       "7                              [营销策划, 用户运营]    整合营销专家  25-45k·14薪  北京-大兴区   \n",
       "8                                    [品牌文案]        文案      10-15k  北京-大兴区   \n",
       "9                        [市场营销, 广告策划, 广告媒介]      市场助理       6-10k  北京-朝阳区   \n",
       "\n",
       "    job.topJob job.requireWorkYears job.requireEduLevel comp.compStage  \\\n",
       "0        False                  NaN                 NaN            NaN   \n",
       "1        False                  NaN                 NaN            NaN   \n",
       "2        False                 经验不限                  本科          融资未公开   \n",
       "3        False                 经验不限                  本科            NaN   \n",
       "4        False                 经验不限                统招本科            NaN   \n",
       "..         ...                  ...                 ...            ...   \n",
       "5        False                 3-5年                统招本科            NaN   \n",
       "6        False                 3-5年                  本科            NaN   \n",
       "7        False                5-10年                  本科            已上市   \n",
       "8        False                 1-3年                  本科          融资未公开   \n",
       "9        False                 1-3年                  本科            NaN   \n",
       "\n",
       "       comp.compName comp.compIndustry comp.compScale  \\\n",
       "0               万达传媒          广播/影视/录音       100-499人   \n",
       "1   五洲畅想国际传媒（北京）有限公司          广播/影视/录音         50-99人   \n",
       "2      诺文科风机(北京)有限公司              人工智能         50-99人   \n",
       "3               欧桦文化           在线社交/媒体          1-49人   \n",
       "4               欧桦文化           在线社交/媒体          1-49人   \n",
       "..               ...               ...            ...   \n",
       "5            五棵松万达广场           其他商务服务业       100-499人   \n",
       "6         福建保罗绿色投资集团                环保       500-999人   \n",
       "7               京东商城               互联网       10000人以上   \n",
       "8    董到家食品科技(北京)有限公司               互联网         50-99人   \n",
       "9     北京华大九天科技股份有限公司       电子/半导体/集成电路       100-499人   \n",
       "\n",
       "                                   comp.link  \\\n",
       "0    https://www.liepin.com/company/1727866/   \n",
       "1     https://www.liepin.com/company/555423/   \n",
       "2    https://www.liepin.com/company/8127145/   \n",
       "3   https://www.liepin.com/company/12441743/   \n",
       "4   https://www.liepin.com/company/12441743/   \n",
       "..                                       ...   \n",
       "5   https://www.liepin.com/company/13393903/   \n",
       "6   https://www.liepin.com/company/13743107/   \n",
       "7    https://www.liepin.com/company/9747025/   \n",
       "8    https://www.liepin.com/company/9564541/   \n",
       "9     https://www.liepin.com/company/699075/   \n",
       "\n",
       "                                       job.link recruiter.recruiterName  \\\n",
       "0        https://www.liepin.com/lptjob/60824047                     李女士   \n",
       "1        https://www.liepin.com/lptjob/52032685                     樊女士   \n",
       "2   https://www.liepin.com/job/1961355953.shtml                     屈女士   \n",
       "3   https://www.liepin.com/job/1957676991.shtml                     陈女士   \n",
       "4   https://www.liepin.com/job/1957946027.shtml                     陈女士   \n",
       "..                                          ...                     ...   \n",
       "5   https://www.liepin.com/job/1963714801.shtml                     宋先生   \n",
       "6   https://www.liepin.com/job/1963368375.shtml                     丁女士   \n",
       "7   https://www.liepin.com/job/1940418567.shtml                     张女士   \n",
       "8   https://www.liepin.com/job/1931768031.shtml                     王女士   \n",
       "9   https://www.liepin.com/job/1924069079.shtml                     喻女士   \n",
       "\n",
       "   recruiter.recruiterTitle job.refreshTime  \n",
       "0                  高级人力资源经理  20231011143844  \n",
       "1                      人事经理  20220819225845  \n",
       "2                            20231218112705  \n",
       "3                      人事专员  20230417183923  \n",
       "4                      人事专员  20230419161003  \n",
       "..                      ...             ...  \n",
       "5                    常务副总经理  20231219165035  \n",
       "6                        人资  20231204143544  \n",
       "7                      猎头顾问  20211123114823  \n",
       "8                        招聘  20200916161408  \n",
       "9                      部门经理  20220314154119  \n",
       "\n",
       "[290 rows x 16 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#查看爬取到的数据\n",
    "广告策划_北京_result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "广告策划_北京_result.to_excel('广告策划_北京_本科.xlsx')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>job.labels</th>\n",
       "      <th>job.title</th>\n",
       "      <th>job.salary</th>\n",
       "      <th>job.dq</th>\n",
       "      <th>job.topJob</th>\n",
       "      <th>job.requireWorkYears</th>\n",
       "      <th>job.requireEduLevel</th>\n",
       "      <th>comp.compStage</th>\n",
       "      <th>comp.compName</th>\n",
       "      <th>comp.compIndustry</th>\n",
       "      <th>comp.compScale</th>\n",
       "      <th>comp.link</th>\n",
       "      <th>job.link</th>\n",
       "      <th>recruiter.recruiterName</th>\n",
       "      <th>recruiter.recruiterTitle</th>\n",
       "      <th>job.refreshTime</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>['3个月', '本科']</td>\n",
       "      <td>广告策划实习生</td>\n",
       "      <td>100-120元/天</td>\n",
       "      <td>北京-朝阳区</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>万达传媒</td>\n",
       "      <td>广播/影视/录音</td>\n",
       "      <td>100-499人</td>\n",
       "      <td>https://www.liepin.com/company/1727866/</td>\n",
       "      <td>https://www.liepin.com/lptjob/60824047</td>\n",
       "      <td>李女士</td>\n",
       "      <td>高级人力资源经理</td>\n",
       "      <td>20231011143844</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>['6个月', '提供转正', '本科', '导演助理', '编导助理', '实习生', '...</td>\n",
       "      <td>编导助理</td>\n",
       "      <td>50-100元/天</td>\n",
       "      <td>北京-海淀区</td>\n",
       "      <td>False</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>五洲畅想国际传媒（北京）有限公司</td>\n",
       "      <td>广播/影视/录音</td>\n",
       "      <td>50-99人</td>\n",
       "      <td>https://www.liepin.com/company/555423/</td>\n",
       "      <td>https://www.liepin.com/lptjob/52032685</td>\n",
       "      <td>樊女士</td>\n",
       "      <td>人事经理</td>\n",
       "      <td>20220819225845</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>['企业品牌', '品牌营销', '自媒体', '公众号']</td>\n",
       "      <td>品牌宣传</td>\n",
       "      <td>8-15k</td>\n",
       "      <td>北京-东城区</td>\n",
       "      <td>False</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>本科</td>\n",
       "      <td>融资未公开</td>\n",
       "      <td>诺文科风机(北京)有限公司</td>\n",
       "      <td>人工智能</td>\n",
       "      <td>50-99人</td>\n",
       "      <td>https://www.liepin.com/company/8127145/</td>\n",
       "      <td>https://www.liepin.com/job/1961355953.shtml</td>\n",
       "      <td>屈女士</td>\n",
       "      <td>NaN</td>\n",
       "      <td>20231218112705</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>[]</td>\n",
       "      <td>影视宣传经理</td>\n",
       "      <td>10-18k·15薪</td>\n",
       "      <td>北京-西城区</td>\n",
       "      <td>False</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>本科</td>\n",
       "      <td>NaN</td>\n",
       "      <td>欧桦文化</td>\n",
       "      <td>在线社交/媒体</td>\n",
       "      <td>1-49人</td>\n",
       "      <td>https://www.liepin.com/company/12441743/</td>\n",
       "      <td>https://www.liepin.com/job/1957676991.shtml</td>\n",
       "      <td>陈女士</td>\n",
       "      <td>人事专员</td>\n",
       "      <td>20230417183923</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>[]</td>\n",
       "      <td>影视宣发经理</td>\n",
       "      <td>9-13k·15薪</td>\n",
       "      <td>北京-西城区</td>\n",
       "      <td>False</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>统招本科</td>\n",
       "      <td>NaN</td>\n",
       "      <td>欧桦文化</td>\n",
       "      <td>在线社交/媒体</td>\n",
       "      <td>1-49人</td>\n",
       "      <td>https://www.liepin.com/company/12441743/</td>\n",
       "      <td>https://www.liepin.com/job/1957946027.shtml</td>\n",
       "      <td>陈女士</td>\n",
       "      <td>人事专员</td>\n",
       "      <td>20230419161003</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>285</th>\n",
       "      <td>5</td>\n",
       "      <td>['广告策划', '市场策划', '市场推广', '市场营销', '广告媒介']</td>\n",
       "      <td>市场主管</td>\n",
       "      <td>7-10k·15薪</td>\n",
       "      <td>北京-海淀区</td>\n",
       "      <td>False</td>\n",
       "      <td>3-5年</td>\n",
       "      <td>统招本科</td>\n",
       "      <td>NaN</td>\n",
       "      <td>五棵松万达广场</td>\n",
       "      <td>其他商务服务业</td>\n",
       "      <td>100-499人</td>\n",
       "      <td>https://www.liepin.com/company/13393903/</td>\n",
       "      <td>https://www.liepin.com/job/1963714801.shtml</td>\n",
       "      <td>宋先生</td>\n",
       "      <td>常务副总经理</td>\n",
       "      <td>20231219165035</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>286</th>\n",
       "      <td>6</td>\n",
       "      <td>['海报设计', '创意设计', '物料设计']</td>\n",
       "      <td>平面设计</td>\n",
       "      <td>7-10k</td>\n",
       "      <td>北京-朝阳区</td>\n",
       "      <td>False</td>\n",
       "      <td>3-5年</td>\n",
       "      <td>本科</td>\n",
       "      <td>NaN</td>\n",
       "      <td>福建保罗绿色投资集团</td>\n",
       "      <td>环保</td>\n",
       "      <td>500-999人</td>\n",
       "      <td>https://www.liepin.com/company/13743107/</td>\n",
       "      <td>https://www.liepin.com/job/1963368375.shtml</td>\n",
       "      <td>丁女士</td>\n",
       "      <td>人资</td>\n",
       "      <td>20231204143544</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>287</th>\n",
       "      <td>7</td>\n",
       "      <td>['营销策划', '用户运营']</td>\n",
       "      <td>整合营销专家</td>\n",
       "      <td>25-45k·14薪</td>\n",
       "      <td>北京-大兴区</td>\n",
       "      <td>False</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>本科</td>\n",
       "      <td>已上市</td>\n",
       "      <td>京东商城</td>\n",
       "      <td>互联网</td>\n",
       "      <td>10000人以上</td>\n",
       "      <td>https://www.liepin.com/company/9747025/</td>\n",
       "      <td>https://www.liepin.com/job/1940418567.shtml</td>\n",
       "      <td>张女士</td>\n",
       "      <td>猎头顾问</td>\n",
       "      <td>20211123114823</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>288</th>\n",
       "      <td>8</td>\n",
       "      <td>['品牌文案']</td>\n",
       "      <td>文案</td>\n",
       "      <td>10-15k</td>\n",
       "      <td>北京-大兴区</td>\n",
       "      <td>False</td>\n",
       "      <td>1-3年</td>\n",
       "      <td>本科</td>\n",
       "      <td>融资未公开</td>\n",
       "      <td>董到家食品科技(北京)有限公司</td>\n",
       "      <td>互联网</td>\n",
       "      <td>50-99人</td>\n",
       "      <td>https://www.liepin.com/company/9564541/</td>\n",
       "      <td>https://www.liepin.com/job/1931768031.shtml</td>\n",
       "      <td>王女士</td>\n",
       "      <td>招聘</td>\n",
       "      <td>20200916161408</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>289</th>\n",
       "      <td>9</td>\n",
       "      <td>['市场营销', '广告策划', '广告媒介']</td>\n",
       "      <td>市场助理</td>\n",
       "      <td>6-10k</td>\n",
       "      <td>北京-朝阳区</td>\n",
       "      <td>False</td>\n",
       "      <td>1-3年</td>\n",
       "      <td>本科</td>\n",
       "      <td>NaN</td>\n",
       "      <td>北京华大九天科技股份有限公司</td>\n",
       "      <td>电子/半导体/集成电路</td>\n",
       "      <td>100-499人</td>\n",
       "      <td>https://www.liepin.com/company/699075/</td>\n",
       "      <td>https://www.liepin.com/job/1924069079.shtml</td>\n",
       "      <td>喻女士</td>\n",
       "      <td>部门经理</td>\n",
       "      <td>20220314154119</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>290 rows × 17 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     Unnamed: 0                                         job.labels job.title  \\\n",
       "0             0                                      ['3个月', '本科']   广告策划实习生   \n",
       "1             1  ['6个月', '提供转正', '本科', '导演助理', '编导助理', '实习生', '...      编导助理   \n",
       "2             2                     ['企业品牌', '品牌营销', '自媒体', '公众号']      品牌宣传   \n",
       "3             3                                                 []    影视宣传经理   \n",
       "4             4                                                 []    影视宣发经理   \n",
       "..          ...                                                ...       ...   \n",
       "285           5           ['广告策划', '市场策划', '市场推广', '市场营销', '广告媒介']      市场主管   \n",
       "286           6                           ['海报设计', '创意设计', '物料设计']      平面设计   \n",
       "287           7                                   ['营销策划', '用户运营']    整合营销专家   \n",
       "288           8                                           ['品牌文案']        文案   \n",
       "289           9                           ['市场营销', '广告策划', '广告媒介']      市场助理   \n",
       "\n",
       "     job.salary  job.dq  job.topJob job.requireWorkYears job.requireEduLevel  \\\n",
       "0    100-120元/天  北京-朝阳区       False                  NaN                 NaN   \n",
       "1     50-100元/天  北京-海淀区       False                  NaN                 NaN   \n",
       "2         8-15k  北京-东城区       False                 经验不限                  本科   \n",
       "3    10-18k·15薪  北京-西城区       False                 经验不限                  本科   \n",
       "4     9-13k·15薪  北京-西城区       False                 经验不限                统招本科   \n",
       "..          ...     ...         ...                  ...                 ...   \n",
       "285   7-10k·15薪  北京-海淀区       False                 3-5年                统招本科   \n",
       "286       7-10k  北京-朝阳区       False                 3-5年                  本科   \n",
       "287  25-45k·14薪  北京-大兴区       False                5-10年                  本科   \n",
       "288      10-15k  北京-大兴区       False                 1-3年                  本科   \n",
       "289       6-10k  北京-朝阳区       False                 1-3年                  本科   \n",
       "\n",
       "    comp.compStage     comp.compName comp.compIndustry comp.compScale  \\\n",
       "0              NaN              万达传媒          广播/影视/录音       100-499人   \n",
       "1              NaN  五洲畅想国际传媒（北京）有限公司          广播/影视/录音         50-99人   \n",
       "2            融资未公开     诺文科风机(北京)有限公司              人工智能         50-99人   \n",
       "3              NaN              欧桦文化           在线社交/媒体          1-49人   \n",
       "4              NaN              欧桦文化           在线社交/媒体          1-49人   \n",
       "..             ...               ...               ...            ...   \n",
       "285            NaN           五棵松万达广场           其他商务服务业       100-499人   \n",
       "286            NaN        福建保罗绿色投资集团                环保       500-999人   \n",
       "287            已上市              京东商城               互联网       10000人以上   \n",
       "288          融资未公开   董到家食品科技(北京)有限公司               互联网         50-99人   \n",
       "289            NaN    北京华大九天科技股份有限公司       电子/半导体/集成电路       100-499人   \n",
       "\n",
       "                                    comp.link  \\\n",
       "0     https://www.liepin.com/company/1727866/   \n",
       "1      https://www.liepin.com/company/555423/   \n",
       "2     https://www.liepin.com/company/8127145/   \n",
       "3    https://www.liepin.com/company/12441743/   \n",
       "4    https://www.liepin.com/company/12441743/   \n",
       "..                                        ...   \n",
       "285  https://www.liepin.com/company/13393903/   \n",
       "286  https://www.liepin.com/company/13743107/   \n",
       "287   https://www.liepin.com/company/9747025/   \n",
       "288   https://www.liepin.com/company/9564541/   \n",
       "289    https://www.liepin.com/company/699075/   \n",
       "\n",
       "                                        job.link recruiter.recruiterName  \\\n",
       "0         https://www.liepin.com/lptjob/60824047                     李女士   \n",
       "1         https://www.liepin.com/lptjob/52032685                     樊女士   \n",
       "2    https://www.liepin.com/job/1961355953.shtml                     屈女士   \n",
       "3    https://www.liepin.com/job/1957676991.shtml                     陈女士   \n",
       "4    https://www.liepin.com/job/1957946027.shtml                     陈女士   \n",
       "..                                           ...                     ...   \n",
       "285  https://www.liepin.com/job/1963714801.shtml                     宋先生   \n",
       "286  https://www.liepin.com/job/1963368375.shtml                     丁女士   \n",
       "287  https://www.liepin.com/job/1940418567.shtml                     张女士   \n",
       "288  https://www.liepin.com/job/1931768031.shtml                     王女士   \n",
       "289  https://www.liepin.com/job/1924069079.shtml                     喻女士   \n",
       "\n",
       "    recruiter.recruiterTitle  job.refreshTime  \n",
       "0                   高级人力资源经理   20231011143844  \n",
       "1                       人事经理   20220819225845  \n",
       "2                        NaN   20231218112705  \n",
       "3                       人事专员   20230417183923  \n",
       "4                       人事专员   20230419161003  \n",
       "..                       ...              ...  \n",
       "285                   常务副总经理   20231219165035  \n",
       "286                       人资   20231204143544  \n",
       "287                     猎头顾问   20211123114823  \n",
       "288                       招聘   20200916161408  \n",
       "289                     部门经理   20220314154119  \n",
       "\n",
       "[290 rows x 17 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_excel('广告策划_北京_本科.xlsx')\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "from requests_html import HTMLSession\n",
    "session = HTMLSession()\n",
    "\n",
    "职位介绍 = []\n",
    "for i in range(len(df['job.link'])):\n",
    "    s = session.get(df['job.link'][i])\n",
    "    command = s.html.find('dd')\n",
    "    职位介绍.append(command[0].text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['职位介绍'] = 职位介绍"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.to_excel('广告策划_北京.xlsx')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": true,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
